# This line will hide code by default when the notebook is exported as HTML
import IPython.core.display as di
di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery("div.input").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)
# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
di.display_html('''<button onclick="jQuery('div.input').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)
import IPython.core.display as di
di.Image('/home/jovyan/demos/assets/oi_header.png')
This notebook demonstrates an alternative means of change detection between 2 periods. We ran the landuse aggregation algorithm for 2 separate periods - different ranges of months - and then compared building footprints between the 2 sets of results to see if there were any large changes.
The Orbital Insight landuse aggregation algorithm takes all available imagery within a specified period, and aggregates the results of each image to produce an overall landuse classification for that period. The aggregation process is helpful for achieving higher valid coverage of your AOIs, by combining the results from multiple images that might each have only partial AOI coverage due to clouds etc. Aggregation also smoothes out potentially noisy per-image results.
Running this properly requires a user has run and completed two separate land use aggregation (LU) projects in GO - one for pre-event and one for post-event. Both projects must be of the same AOI.
Here, we look at Khartoum, Sudan in the second half of 2017 (This time range can be adjusted based on GO projects created):
Tl_rcsmJeq-200127)v8JCM5wVb_-200127)We can also provide a filter for areas which were too cloudy or otherwise non-viable in the time periods.
*Note: this workflow can work across both multiple and single projects for each time period.
# This needs to be at the top of every notebook
import envbash
envbash.load_envbash('/home/jovyan/.profile')
# This sets up the notebook to use the Go demo account (read-only) token, where
# the demo Bahamas projects are accessible
# *Remove this if you are modifying this notebook for your own projects*
#import os
#os.environ['GO_API_TOKEN'] = os.environ['GO_DEMO_API_TOKEN']
# Other required imports
import pandas as pd
import geopandas as gpd
from shapely.wkt import loads as shapely_loads
from shapely.wkt import loads
from shapely.geometry import shape
import shapely.wkt
from shapely.geometry import Point, Polygon
import plotly.offline as py
import plotly.graph_objs as go
import statsmodels as sm
import plotly.express as px
import numpy as np
from go_utils import GoProject
import oi_colors as oic
# This is our primary function. Run this cell, do not change it.
def calculate_two_period_bldg_intersects(pre_bldgs,
post_bldgs):
pre_bldgs['pre_bldg_id'] = pre_bldgs.index
pre_bldgs['pre_bldg_area'] = pre_bldgs.geometry.area
# Then do the same for post-event buildings:
post_bldgs['post_bldg_id'] = post_bldgs.index
post_bldgs['post_bldg_area'] = post_bldgs.geometry.area
# Then run intersections, first looking at pre-event buildings for intersection with post-event buildings (i.e. "are they still there?"):
pre_filtered = gpd.overlay(pre_bldgs, post_bldgs, how='intersection')
pre_filtered = pre_filtered.set_geometry('geometry')
pre_filtered = pre_filtered.dissolve(by='pre_bldg_id')
pre_filtered['pre_bldg_id'] = pre_filtered.index
pre_filtered['post_overlap_area'] = pre_filtered.geometry.area
pre_overlap_area_dict = dict(zip(pre_filtered['pre_bldg_id'],pre_filtered['post_overlap_area']))
pre_bldgs['post_overlap_area'] = np.nan
pre_bldgs['post_overlap_area'] = pre_bldgs['pre_bldg_id'].map(pre_overlap_area_dict).fillna(0)
pre_bldgs['post_overlap_area'] = pre_bldgs['post_overlap_area'].fillna(0)
pre_bldgs['post_percent_overlap'] = pre_bldgs['post_overlap_area']/pre_bldgs['pre_bldg_area']
# Then do the same for post-event buildings (i.e. "were they not there before?")
post_filtered = gpd.overlay(post_bldgs, pre_bldgs, how='intersection')
post_filtered = post_filtered.set_geometry('geometry')
post_filtered = post_filtered.dissolve(by='post_bldg_id')
post_filtered['post_bldg_id'] = post_filtered.index
post_filtered['pre_overlap_area'] = post_filtered.geometry.area
post_overlap_area_dict = dict(zip(post_filtered['post_bldg_id'],post_filtered['pre_overlap_area']))
post_bldgs['pre_overlap_area'] = np.nan
post_bldgs['pre_overlap_area'] = post_bldgs['post_bldg_id'].map(post_overlap_area_dict)
post_bldgs['pre_overlap_area'] = post_bldgs['pre_overlap_area'].fillna(0)
post_bldgs['pre_percent_overlap'] = post_bldgs['pre_overlap_area']/post_bldgs['post_bldg_area']
# All done!
return pre_bldgs, post_bldgs
# Input project_id for pre- and post-event LU projects. In this case "pre" and "post" means the first time range and the second, respetively. Modify as needed.
print('Getting pre-event landuse results (first set)...')
pre_event_project = GoProject('Tl_rcsmJeq-200127')
pre_event_results = pre_event_project.get_landuse_agg_results()
print('Getting post-event landuse results...')
post_event_project = GoProject('v8JCM5wVb_-200127')
post_event_results = post_event_project.get_landuse_agg_results()
Data is returned as tile-level geometries. (The AOI is tiled up by Orbital Insight; tiling makes it easier to handle large AOIs and geometries.)
tile_buildings timeseries: building polygons detected by the landuse aggregation algorithm.tile_valid_info timeseries: valid area polygons. Valid areas denote parts of the AOI that were covered by non-cloudy imagery (ie there was at least 1 satellite image covering that area, and that part of the image was not cloudy).# Get GeoDataFrames, drop empty geometries
# There should be one of these for each project and each class you are working with. Adjust as needed.
gdf_pre_buildings = pre_event_results.get_timeseries('tile_buildings')
gdf_pre_buildings = gdf_pre_buildings[~gdf_pre_buildings.is_empty]. \
explode().reset_index(drop=True)
gdf_post_buildings = post_event_results.get_timeseries('tile_buildings')
gdf_post_buildings = gdf_post_buildings[~gdf_post_buildings.is_empty]. \
explode().reset_index(drop=True)
gdf_pre_valid = pre_event_results.get_timeseries('tile_valid_info')
gdf_pre_valid = gdf_pre_valid[~gdf_pre_valid.is_empty]. \
explode().reset_index(drop=True)
gdf_post_valid = post_event_results.get_timeseries('tile_valid_info')
gdf_post_valid = gdf_post_valid[~gdf_post_valid.is_empty]. \
explode().reset_index(drop=True)
# This step is only necessary if more than one project was run for any of the time periods. Adjust as needed.
# In this case, we're adding the two buildings/valid area pulls for pre-event, and don't need to do anything for post-event.
#pre_buildings_dfs = [gdf_pre_buildings1, gdf_pre_buildings2]
#pre_valid_dfs = [gdf_pre_valid1, gdf_pre_valid2]
# post_buildings_dfs = []
# post_valid_dfs = []
# Here we combine those that need combining into one seamless data set. Again, adjust as needed.
# In this case, we're only doing it for the pre-event data sets, with the lists we made right before this.
#gdf_pre_buildings = gpd.GeoDataFrame(pd.concat(pre_buildings_dfs, ignore_index=True), crs=pre_buildings_dfs[0].crs, geometry='geom')
#gdf_pre_valid = gpd.GeoDataFrame(pd.concat(pre_valid_dfs, ignore_index=True), crs=pre_valid_dfs[0].crs, geometry='geom')
# gdf_post_buildings = gpd.GeoDataFrame(pd.concat(post_buildings_dfs, ignore_index=True), crs=post_buildings_dfs[0].crs, geometry='geom')
# gdf_post_valid = gpd.GeoDataFrame(pd.concat(post_valid_dfs, ignore_index=True), crs=post_valid_dfs[0].crs, geometry='geom')
# After this, there is no more customization needed in the core workflow!
pre_bldgs_populated, post_bldgs_populated = calculate_two_period_bldg_intersects(gdf_pre_buildings, gdf_post_buildings)
# Checking next for validity in either direction: 'Which of these are not obstructed by clouds in the other dataset'
gdf_pre_to_post_valid_buildings = gpd.overlay(pre_bldgs_populated, gdf_post_valid, how='intersection')
gdf_post_to_pre_valid_buildings = gpd.overlay(post_bldgs_populated, gdf_pre_valid, how='intersection')
# Get a list of building IDs for those two subsets to tell us which are "valid" in the other's dataset
pre_to_post_valid_buildings = gdf_pre_to_post_valid_buildings['pre_bldg_id'].unique()
post_to_pre_valid_buildings = gdf_post_to_pre_valid_buildings['post_bldg_id'].unique()
# Mark pre-event buildings as valid (observed in post-event),
# and remain (present in both post & pre)
pre_bldgs_populated['valid'] = pre_bldgs_populated['pre_bldg_id']. \
apply(lambda x: x in pre_to_post_valid_buildings)
post_bldgs_populated['valid'] = post_bldgs_populated['post_bldg_id']. \
apply(lambda x: x in post_to_pre_valid_buildings)
pre_bldgs_populated: GeoDataFrame of all buildings seen in time range one.
valid column denotes whether the building was in the valid area in time range twopost_percent_overlap column denotes percentage of the time range one building footprint that remains optically recognizable in the time range two building dataset. The higher the percentage, the more likely it is that the building remains fully intact; the lower, the more likely it no longer exists in an optically recognizable condition. This is particularly useful for post-disaster damage assessments, or generalized de-construction. post_bldgs_populated: GeoDataFrame of all buildings seen time range two.
valid column denotes whether the building was in the valid area in time range twopre_percent_overlap column denotes percentage of the time range two building footprint that was already optically recognizable in the time range one building dataset. The higher the percentage, the more likely it is that this building already existed before the event; the lower, the more likely it is that it is a new or expanded feature. This is particularly useful for evaluating disaster recovery, population dispersal, or urban growth/expansion.We can explore the distribution of overlapping buildings between the two time periods, as shown below, which may help power a more rigorous, data science-heavy approach by some users.
# This may be a useful type of chart for evaluating the impact of a scenario like this one:
px.histogram(pre_bldgs_populated, x='post_percent_overlap', color = 'valid', title='Distribution of previously existent building footprints remainder in second time range')
We can quickly assess disparate damages across multiple AOIs if needed.
# # This loop will generate a distribution chart specific to each AOI in the project.
# for aoi in pre_bldgs_populated['aoi_name'].unique():
# fig = px.histogram(pre_bldgs_populated.loc[pre_bldgs_populated['aoi_name'] == aoi], x='post_percent_overlap', color = 'valid', title='Distribution of previously existent building footprints in {} in time range two'.format(aoi))
# fig.show()
# # This may be less useful for the Dorian example, but could be revisited to look for reconstruction or in different scenarios
px.histogram(post_bldgs_populated, x='pre_percent_overlap', color = 'valid', title='Distribution of time range two building footprints compared to time range one status')
This workflow is elective, you may wish to do a more rigorous interpretation or bin through other means, or simply go back to a binary, or look into all/nothing/some approaches.
# We'll first choose to bin this dataset into five equal intervals
pre_bldgs_populated['bins'] = pd.cut(pre_bldgs_populated['post_percent_overlap'],bins=5)
pre_bldgs_populated['bins'] = pre_bldgs_populated['bins'].astype(str)
pre_bldgs_populated['bins'].loc[~pre_bldgs_populated['valid']] = 'not observed'
pre_bldgs_populated = pre_bldgs_populated.sort_values(by='bins').reset_index(drop=True)
# Subset into status-specific individual dataframes, for summary and also leaflet map (below)
gdf_unobserved = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == 'not observed']
gdf_remain = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == '(0.8, 1.0]']
gdf_mostly_remain = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == '(0.6, 0.8]']
gdf_partial = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == '(0.4, 0.6]']
gdf_mostly_gone = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == '(0.2, 0.4]']
gdf_gone = pre_bldgs_populated.loc[pre_bldgs_populated['bins'] == '(-0.001, 0.2]']
print('{} buildings pre-event: {} are <20% intact, {} are 20-40% intact, {} are 40-60% intact, {} are 60-80% intact, {} are >80% intact; {} are unobserved'.
format(len(pre_bldgs_populated), len(gdf_gone), len(gdf_mostly_gone), len(gdf_partial), len(gdf_mostly_remain), len(gdf_remain),
len(gdf_unobserved)))
We can export now! The CSV and geopackage (GPKG) files (buildings_status_all) will contain the same data. These will be good for rendering in QGIS/ArcGIS/etc.
We also provide an optional file (buildings_status_valid), with only buildings that had valid coverage post-event - ie only the buildings whose status we are able to verify post-event.
# # All buildings seen pre-event, with 'valid' & 'remain' status
# pre_bldgs_populated.to_csv('buildings_status_all.csv')
# pre_bldgs_populated.to_file('buildings_status_all.gpkg', driver='GPKG')
# # Only buildings with valid coverage post-event
# gdf_only_valid = pre_bldgs_populated[pre_bldgs_populated['valid']]
# gdf_only_valid.to_csv('buildings_status_valid.csv')
We have decided to bin the missing-vs-remaining buildings into 20% ranges. This is entirely up to user discretion. In this case, we can consider the following about our ranges:
We'll first offer a histogram summarizing these distributions across all tiles.
# Overall distribution of building status/bins across tiles
px.histogram(pre_bldgs_populated, x='tile_id', color='bins')
Next, we'll organize these into a new grouped/summarized dataframe for quicker access to those higher-level statistics. We can view what this cleaned up, macro-level chart looks like, below.
dummies = pd.get_dummies(pre_bldgs_populated.loc[:, 'bins'], prefix=None, drop_first=False)
pre_bldgs_populated_dummies = pd.concat([pre_bldgs_populated, dummies], axis=1)
tile_grp = pre_bldgs_populated_dummies.groupby('tile_id')
df_tile_summary = pd.DataFrame({
'total_buildings': tile_grp.size(),
'>80% intact': tile_grp['(0.8, 1.0]'].sum().astype(int),
'60-80% intact': tile_grp['(0.6, 0.8]'].sum().astype(int),
'40-60% intact': tile_grp['(0.4, 0.6]'].sum().astype(int),
'20-40% intact': tile_grp['(0.2, 0.4]'].sum().astype(int),
'<20% intact': tile_grp['(-0.001, 0.2]'].sum().astype(int)})
#'not_observed': tile_grp['not observed'].sum().astype(int)})
# df_tile_summary['missing_buildings'] = df_tile_summary['total_buildings'] - \
# df_tile_summary['remaining_buildings']
# Need tile_id as column for plotly express
df_tile_summary = df_tile_summary.reset_index()
df_tile_summary.head(10)
The first map is of individual building footprint geometries, colorized per those ranges.
Hint: zoom in to Khartoum and their populated areas to see building outlines
# We'll first find the geometric mean (centroid) of all buildings. This will help us center the map on more heavily populated areas.
bldg_y_list = []
bldg_x_list = []
for index, row in pre_bldgs_populated.iterrows():
y = row['geom'].centroid.y
bldg_y_list.append(y)
x = row['geom'].centroid.x
bldg_x_list.append(x)
bldgs_y_center = sum(bldg_y_list)/len(bldg_y_list)
bldgs_x_center = sum(bldg_x_list)/len(bldg_x_list)
bldg_geojson = gpd.GeoSeries(pre_bldgs_populated['geom']).__geo_interface__
# Please adjust zoom level, colorization, and any other parameters where needed
# We'll also make sure to have the hover text indicate the 'bins', since that'll inform whether the building was actuall "not observed" rather than truly observed missing
fig = go.Figure(go.Choroplethmapbox(geojson = bldg_geojson,
locations = pre_bldgs_populated['pre_bldg_id'],
# Adjust the field you want to visualize the chloropleth based on:
z = pre_bldgs_populated['post_percent_overlap'],
colorscale = "RdBu",
# reversescale=True,
# Adjust the title as needed, depending what you're visualizing:
colorbar_title = 'Building Status',
text = str('Status/Range: ') + pre_bldgs_populated['bins'].astype(str),
marker_line_color='darkgray',
marker_opacity = 0.8,
marker_line_width = 0.2))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=7,
mapbox_center = {"lat": bldgs_y_center,
"lon": bldgs_x_center})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
# str('quarter_')+df['quarter'].astype(str)
This is very flexible, so we will also show these same figures by proportionality (i.e. those values within the overall number of previously observed buildings). These combined give a much more complete depiction of damages and give a better ability to triage attention.
# The next few blocks are to get us set up to actually view by tiles. Run them as is.
from oi_tiler import Tile
tile_geoms = []
for tile_id in df_tile_summary['tile_id']:
tile_geoms.append(Tile(tile_id).get_envelope_as_geometry(use_ogr=False))
df_tile_summary['tile_geom'] = tile_geoms
df_tile_summary.index = df_tile_summary['tile_id']
geojson = gpd.GeoSeries(df_tile_summary['tile_geom']).__geo_interface__
# Here is where we set up which variables we may want to combine for those visualizations (this is entirely elective on what you want to visualize, and may not be necessary at all!)
# The first two are for the numbers in those two combined categories:
df_tile_summary['mostly_damaged'] = df_tile_summary['<20% intact'] + df_tile_summary['20-40% intact']
df_tile_summary['mostly_intact'] = df_tile_summary['>80% intact'] + df_tile_summary['60-80% intact']
# This next one is to get the proportionality of the first of those two:
df_tile_summary['proportion_damaged'] = df_tile_summary['mostly_damaged'] / df_tile_summary['total_buildings']
df_tile_summary['proportion_intact'] = df_tile_summary['mostly_intact'] / df_tile_summary['total_buildings']
# And here is where we prepare a summary text for each one.
text_list = []
for index, row in df_tile_summary.iterrows():
text = ("{} buildings pre-event: {} <20% intact, {} 20-40% intact, {} 40-60% intact, {} 60-80% intact, {} >80% intact".#; {} unobserved".
format(row['total_buildings'], row['<20% intact'], row['20-40% intact'], row['40-60% intact'], row['60-80% intact'], row['>80% intact']))#, row['not_observed']))
text_list.append(text)
df_tile_summary['text'] = text_list
We'll first look at the distribution of most likely destroyed or damaged building areas. Respectively, the next two maps show tiles colorized by 1) the absolute number of such buildings, and 2) the proportion of such buildings across all buildings within the tile.
# Again, adjust zoom level, colorization, and any other parameters where needed
fig = go.Figure(go.Choroplethmapbox(geojson = geojson,
locations = df_tile_summary['tile_id'],
# Adjust the field you want to visualize the chloropleth based on:
z = df_tile_summary['mostly_damaged'],
colorscale = "Plasma",
# reversescale=True,
# Adjust the title as needed, depending what you're visualizing:
colorbar_title = '# of mostly missing buildings',
text = df_tile_summary['text'],
marker_line_color='white',
marker_opacity = 0.6,
marker_line_width = 0))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=7,
mapbox_center = {"lat": bldgs_y_center,
"lon": bldgs_x_center})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
# Again, adjust zoom level, colorization, and any other parameters where needed
fig = go.Figure(go.Choroplethmapbox(geojson = geojson,
locations = df_tile_summary['tile_id'],
# Adjust the field you want to visualize the chloropleth based on:
z = df_tile_summary['proportion_damaged'],
colorscale = "RdBu",
reversescale=True,
# Adjust the title as needed, depending what you're visualizing:
colorbar_title = 'Proportion of missing buildings',
text = df_tile_summary['text'],
marker_opacity = 0.75,
marker_line_width = 0))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=7,
mapbox_center = {"lat": bldgs_y_center,
"lon": bldgs_x_center})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
It may also be informative to look at the inverse, as below, showing where are the highest percentages of mostly intact buildings.
# We will skip the total # of mostly intact buildings, but it would be easy to add back in. This one is for proportion of mostly intact buildings.
fig = go.Figure(go.Choroplethmapbox(geojson = geojson,
locations = df_tile_summary['tile_id'],
# Adjust the field you want to visualize the chloropleth based on:
z = df_tile_summary['proportion_intact'],
colorscale = "RdBu",
# reversescale=True,
# Adjust the title as needed, depending what you're visualizing:
colorbar_title = 'Proportion of mostly intact buildings',
text = df_tile_summary['text'],
marker_opacity = 0.6,
marker_line_width = 0))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=7,
mapbox_center = {"lat": bldgs_y_center,
"lon": bldgs_x_center})
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
Some viewers or use cases may benefit from looking at building centroids instead of the entire footprint (as was visualized back on the very first map). This next block will export center-point versions of each building.
# copy poly to new GeoDataFrame
gdf_pre_buildings_points = pre_bldgs_populated.copy()
# change the geometry
gdf_pre_buildings_points.geometry = gdf_pre_buildings_points['geom'].centroid
# same crs
gdf_pre_buildings_points.crs = gdf_pre_buildings.crs
gdf_pre_buildings_points.to_csv('buildings_status_all_points.csv')